////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

// PREPARE DATA FOR: 


// "Deforestation in the Amazon: A Unified Framework for Estimation and Policy Analysis"


// by Eduardo Souza-Rodrigues, University of Toronto


// November 2018


clear

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

// SET PATH

*global BasePath "C:\FILE"
chdir "$BasePath"

clear 

set more off

////////////////////////////////////////////////////////////////////////////////


// 1. Generate Summary Statistics (Tables 1 and 2) *****************************

* Load Data Set	
	use main_data.dta, clear

* Summary Statistics (Table 1)
	sum nfarms prop_farm prop_def_farm cost_port dist_port dist_cap temp rain alt slope soil* mining powerplant pop fines* dist_ibama dist_pa title carbon_f carbon_def

* Generate Share of Total Number of Farms
	gen share_number_s  = nfarms_s  / nfarms
	gen share_number_sm = nfarms_sm / nfarms
	gen share_number_ml = nfarms_ml / nfarms
	gen share_number_l  = nfarms_l  / nfarms

* Generate Share of Total Number of Farms
	gen share_area_s  = area_s  / (area_s + area_sm + area_ml + area_l)
	gen share_area_sm = area_sm / (area_s + area_sm + area_ml + area_l)
	gen share_area_ml = area_ml / (area_s + area_sm + area_ml + area_l)
	gen share_area_l  = area_l  / (area_s + area_sm + area_ml + area_l)

* Summary Statistics, by farm size (Table 2)
* Small Farms
	sum nfarms_s share_number_s share_area_s prop_def_s qha_s if prop_def_s ~= .
* Small-Medium Farms
	sum nfarms_sm share_number_sm share_area_sm prop_def_sm qha_sm if prop_def_sm ~= .
* Medium-Large Farms
	sum nfarms_ml share_number_ml share_area_ml prop_def_ml qha_ml if prop_def_ml ~= .
* Large Farms
	sum nfarms_l share_number_l share_area_l prop_def_l qha_l if qha_crops_l ~= 0 & prop_def_l ~= .
	
	
////////////////////////////////////////////////////////////////////////////////


// II. Generate Spatially Lagged Variables *************************************

* Load Data Set	
	use main_data.dta, clear
	
* Set Cutoff Distance	
	local cutoff = 75

* Check The number of Neighbors
	gen e = 1
	*spgen e, lat(Lat) lon(Long) swm(bin) dist(75) dunit(km) approx nostd
	spgen e, lat(Lat) lon(Long) swm(bin) dist(`cutoff') dunit(km) approx nostd
	rename splag1_e W1neigbohrs
	drop e

* Generate Spatially Dependent Variables -- Construct ONLY Those that we EFFECTIVELY use
	foreach VAR in population mining powerplant dist_pa {
		
		* First Order Wx, distance < 75
		spgen `VAR', lat(Lat) lon(Long) swm(pow 1) dist(`cutoff') dunit(km) approx
		rename splag1_`VAR' W`VAR'
		replace W`VAR' = 0 if W`VAR' == .
		
	}

sum W*

* Save Data Set	
	save main_land_use_data.dta, replace


////////////////////////////////////////////////////////////////////////////////

// III. Prepare Data for Small Farms *******************************************

clear

* Load Data Set	
	use main_land_use_data.dta, clear

* Keep Variables
	keep codibge																			///
		prop_def_s																			///
		qha_crops_s qha_s 																	///
		yields_rice_s yields_manioc_s yields_corn_s yields_soy_s yields_beans_s				///
		cost_port 				    														///
		alt temp rain slope soil2-soil5 pop mining powerplant powerplant_n dist_pa title	///
		dist_ibama fines_up2005 fines_up2003 												///
		dist_port dist_cap 																	///
		carbon_diff area_s 																	///
		Wpop Wmining Wpowerplant Wdist_pa 													///
		cluster_ir cluster50-cluster150																						
	 
* Drop Missing Variables
	drop if prop_def_s == .

* Summarize Variables
	sum codibge-cluster150

* Export to use in MATLAB 
	outsheet using land_use_matlab_small.txt, nonames noquote replace

clear

////////////////////////////////////////////////////////////////////////////////

// IV. Prepare Data for Small-Medium Farms *************************************

clear

* Load Data Set	
	use main_land_use_data.dta, clear

* Keep Variables
	keep codibge																			///
		prop_def_sm																			///
		qha_crops_sm qha_sm																	///
		yields_rice_sm yields_manioc_sm yields_corn_sm yields_soy_sm yields_beans_sm		///
		cost_port 				    														///
		alt temp rain slope soil2-soil5 pop mining powerplant powerplant_n dist_pa title	///
		dist_ibama fines_up2005 fines_up2003 												///
		dist_port dist_cap 																	///
		carbon_diff area_sm 																///
		Wpop Wmining Wpowerplant Wdist_pa 													///
		cluster_ir cluster50-cluster150																						
	 
* Drop Missing Variables
	drop if prop_def_sm == .

* Summarize Variables
	sum codibge-cluster150

* Export to use in MATLAB 
	outsheet using land_use_matlab_small_medium.txt, nonames noquote replace

clear

////////////////////////////////////////////////////////////////////////////////

// V. Prepare Data for Medium-Large Farms **************************************

clear

* Load Data Set	
	use main_land_use_data.dta, clear

* Keep Variables
	keep codibge																			///
		prop_def_ml																			///
		qha_crops_ml qha_ml 																///
		yields_rice_ml yields_manioc_ml yields_corn_ml yields_soy_ml yields_beans_ml		///
		cost_port 				    														///
		alt temp rain slope soil2-soil5 pop mining powerplant powerplant_n dist_pa title	///
		dist_ibama fines_up2005 fines_up2003 												///
		dist_port dist_cap 																	///
		carbon_diff area_ml 																///
		Wpop Wmining Wpowerplant Wdist_pa 													///
		cluster_ir cluster50-cluster150																						
	 
* Drop Missing Variables
	drop if prop_def_ml == .

* Summarize Variables
	sum codibge-cluster150

* Export to use in MATLAB 
	outsheet using land_use_matlab_medium_large.txt, nonames noquote replace

clear

////////////////////////////////////////////////////////////////////////////////

// VI. Prepare Data for Large Farms ********************************************

clear

* Load Data Set	
	use main_land_use_data.dta, clear

* Keep Variables
	keep codibge																			///
		prop_def_l																			///
		qha_crops_l qha_l 																	///
		yields_rice_l yields_manioc_l yields_corn_l yields_soy_l yields_beans_l				///
		cost_port 				    														///
		alt temp rain slope soil2-soil5 pop mining powerplant powerplant_n dist_pa title	///
		dist_ibama fines_up2005 fines_up2003 												///
		dist_port dist_cap 																	///
		carbon_diff area_l 																	///
		Wpop Wmining Wpowerplant Wdist_pa 													///
		cluster_ir cluster50-cluster150																						
	 
* Drop Missing Variables
	drop if prop_def_l  == .
	drop if qha_crops_l == 0

* Summarize Variables
	sum codibge-cluster150

* Export to use in MATLAB 
	outsheet using land_use_matlab_large.txt, nonames noquote replace

clear


////////////////////////////////////////////////////////////////////////////////

// VII. Prepare Data for Other Regressions *************************************

clear

* Load Data Set	
	use main_land_use_data.dta, clear

* Keep Variables
	keep codibge																				///
		prop_census prop_sat prop_sat_n prop_farm prop_clouds prop_sat_total					///
		cost_port 				    															///
		alt temp rain slope soil2-soil5 pop mining powerplant powerplant_n dist_pa title		///
		dist_ibama fines_up2005 fines_up2003 													///
		dist_port dist_cap 																		///
		Wpop Wmining Wpowerplant Wdist_pa 														///
		cluster_ir cluster50-cluster150																						

* Summarize Variables
	sum codibge-cluster150

* Export to use in MATLAB 
	outsheet using land_use_matlab_variables.txt, nonames noquote replace

clear


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////


